rm(list = ls())
here::i_am(file.path("code", "07_iv_tailincome.R"))
library(here)
source(here("code",  "config.R"))

est_df <- read_parquet(here("data", "analysis", "analysis.parquet"))

inc_vars <- c("occscore_c1930", "incwage_pred_c1930", "presgl_c1930", "sei_c1930", "npboss50_c1930")
quantiles <- c(50, 75, 90, 95, 99)

est_df <- est_df |>
  mutate(across(all_of(inc_vars), \(x) ifelse(empstat_c1930 == 20 | empstat_c1930 == 30, 0, x)))

for (v in inc_vars) {
  for (q in quantiles) {
    est_df[[paste0(v, "_q", q)]] <- est_df[[v]] >= quantile(est_df[[v]], q/100, na.rm = T)
  }
}

lhs <- do.call(paste0, expand.grid(inc_vars, "_q", quantiles))

res <- feols(
  .[lhs] ~ 1 |
      board_identifier + birthyr_cards + bpl_cards + exemption^married_cards +
    farmer + laborer + farm_laborer |
    vet_combined ~ order_num_serial_norm,
  data = est_df,
  cluster = ~ serial_num
)

p <- coeftable(res) |>
  as_tibble() |>
  separate(lhs, into = c("lhs", "q"), sep = "_q") |>
  mutate(
    lhs = factor(
      lhs,
      levels = inc_vars,
      labels = c("Occ. score", "Predicted inc.", "Prestige score", "Duncan SEI", "Nam-Powers-Boyd")
    ),
    q = paste0(">", q, "th")
  ) |>
  ggplot(aes(x = q, y = Estimate, ymin = Estimate - 1.96*`Std. Error`, ymax = Estimate + 1.96*`Std. Error`, shape = lhs)) +
  geom_hline(yintercept = 0, linetype = "dashed", alpha = .7) +
  labs(
    x = "Quantile",
    y = "TSLS coefficient on veteran",
    color = "",
    shape = ""
  ) +
  my_theme() +
  ylim(-.15, .15) +
  guides(color = guide_legend(nrow = 2), shape = guide_legend(nrow = 2))

p_color <- p +
  geom_point(aes(color = lhs), size = 2, position = position_dodge2(.5)) +
  geom_linerange(aes(color = lhs), position = position_dodge2(.5))
ggsave(file.path(fig_dir, "color", "tail_income.pdf"), plot = p_color, width = 5, height = 3.2)

p_bw <- p +
  geom_point(size = 2, position = position_dodge2(.5)) +
  geom_linerange(position = position_dodge2(.5))
ggsave(file.path(fig_dir, "bw", "tail_income.pdf"), plot = p_bw, width = 5, height = 3.2)
